package com.catmiw.funds.tool;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlTool {

	public static List<List<String>> handleTable(String html){
		List<List<String>> tab=new ArrayList<List<String>>();
		
		Pattern pattern = Pattern.compile("<[t|T][r|R]([\"\\s=:#;\\w.\\(\\)-/%])*>");
		
		Matcher matcher=pattern.matcher(html);
		html=matcher.replaceAll("#tr#");
		
		pattern = Pattern.compile("</[t|T][r|R]>");
		matcher=pattern.matcher(html);
		html=matcher.replaceAll("#/tr#");
		

		pattern = Pattern.compile("<[t|T][d|D]([\"\\s=:#;\\w.\\(\\)-/%])*>");
		matcher=pattern.matcher(html);
		html=matcher.replaceAll("#td#");
		pattern = Pattern.compile("</[t|T][d|D]>");
		matcher=pattern.matcher(html);
		html=matcher.replaceAll("");

		pattern = Pattern.compile("</?[\\w\"'= :;\\?&-/]*>");
		matcher=pattern.matcher(html);
		html=matcher.replaceAll("");

		html=html.replace("\r\n", "");
		html=html.replace("\n", "");

		pattern = Pattern.compile("#tr#([ \\w(#td#)-.%:[\u1000-\uffff]])*#/tr#");
		matcher=pattern.matcher(html);
		
		while (matcher.find()){
			String temp=matcher.group();
			temp=temp.replace("#/tr#", "");
			temp=temp.replace("#tr#", "");
			String tt[]=temp.split("#td#");
			tab.add(Arrays.asList(tt));
		}
		return tab;
	}
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		String html=HttpTool.getHtml("http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryCompanyStatement?isAdvQuery=1&PRODUCTID=600000&REPORTTYPE2=DQBG&REPORTTYPE=ALL");
		System.out.println(html);
		System.out.println("---------------");
		//List<List<String>> tab=handleTable(html);
		//System.out.println(tab);
	}

}
